#### R SYNTAX ####

#### Loading MLM libraries ####
library(nlme)
library(lme4)

#### Reading in the Data ####
legislator.data <- read.delim("TobaccoLegislators.dat",sep="\t",dec=".",header=T)
legislator.data$legislator <- 1:527

#### Descriptive Statistics ####
# Centrality and Spread
with(legislator.data, {
  cat("Descriptive Stats for Money\n")
  cat("Mean:", mean( money, na.rm=T ), "\n")
  cat("Median:", median( money, na.rm=T ), "\n")
  cat("Mode:", names(which.max( table( money ) )), "\n")
  cat("Variance:", var(money, na.rm=T), "\n")
  cat("Standard Deviation:", sd( money, na.rm=T), "\n")
  cat("Interquartile Range:", quantile( money, probs=c(.25, .75)), "\n")
  cat("Descriptive Stats for Acres\n")
  cat("Mean:", mean( acres, na.rm=T ), "\n")
  cat("Median:", median( acres, na.rm=T ), "\n")
  cat("Mode:", names(which.max( table( acres ) )), "\n")
  cat("Variance:", var(acres, na.rm=T), "\n")
  cat("Standard Deviation:", sd( acres, na.rm=T), "\n")
  cat("Interquartile Range:", quantile( acres, probs=c(.25, .75)), "\n")
  cat("Descriptive Stats for Voting, Expressed as a Percentage\n")
  cat("Mean:", mean( voting*100, na.rm=T ), "\n")
  cat("Median:", median( voting*100, na.rm=T ), "\n")
  cat("Mode:", names(which.max( table( voting*100 ) )), "\n")
  cat("Variance:", var(voting*100, na.rm=T), "\n")
  cat("Standard Deviation:", sd( voting*100, na.rm=T), "\n")
  cat("Interquartile Range:", quantile( voting*100, probs=c(.25, .75)), "\n")
})

#### Correlations Among Variables ####
with(legislator.data, {
  cor( money, voting, use="complete.obs" ) 
  cov( money, voting, use="complete.obs" )
})

#### Data Preparation for GLM: Grand-Mean Center Predictors ####
legislator.data <- within(legislator.data, {
  house.effectc <- ifelse(house==0,-1,house) #Senate was coded with a "0" in raw dataset
  acres.grandc <- acres - mean(acres, na.rm=T)
  money.grandc <- money - mean(money, na.rm=T)
})

#### Example Regression Models, Not Accounting for Dependence ####
#Example simple regression 
simple.regression <- lm( voting~money.grandc, data=legislator.data)
summary(simple.regression)
#Example moderated regression
moderated.regression <- lm( voting~money.grandc*acres.grandc, data=legislator.data)
summary(moderated.regression)


#### Aggregation Demo ####
#First, you have to create an aggregated dataset, where each state is the observation
AGGREGATE <- with(legislator.data, data.frame(state = (levels(state))))
#Calculate the means for each state, for all variables
AGGREGATE$money <- with(legislator.data, as.numeric(tapply(money, state, mean, na.rm=T)))
AGGREGATE$acres <- with(legislator.data, as.numeric(tapply(acres, state, mean, na.rm=T)))
AGGREGATE$voting <- with(legislator.data, as.numeric(tapply(voting, state, mean, na.rm=T)))
#You still need to center your variables, but it's best to do this after aggregation
AGGREGATE <- within(AGGREGATE, {
  money <- money - mean(money)
  acres <- acres - mean(acres)
})
#Example moderated regression with aggregation
aggregate.regression <- lm( voting~money*acres, data=AGGREGATE)
summary(aggregate.regression)

#### MULTILEVEL MODELLING! ####

#### Data Preparation ####
# Group-mean center the Level 1 predictor (money)
#The following line creates a dataset with 
group.mean.dataset <- with(legislator.data, data.frame(state = names(tapply(money, state, mean, na.rm=T)),money.group.mean = as.numeric(tapply(money, state, mean, na.rm=T))))
legislator.data <- merge(legislator.data, group.mean.dataset)
legislator.data <- within(legislator.data, money.groupc <- money - money.group.mean)

#Multilevel model with random intercept
mixed <- lme( voting~money.groupc, random=~1|state, data=legislator.data); summary(mixed) #Using the lme() function
mixed <- lmer( voting~(1|state)+money.groupc, data=legislator.data ); summary(mixed) #Using the lmer() function

#Moderated multilevel model with random intercept and random slope
mixed <- lme( voting~money.groupc*acres.grandc, random=~1+money.groupc|state, data=legislator.data ); summary(mixed) #Using the lme() function
mixed <- lmer( voting~(1+money.groupc|state)+money.groupc*acres.grandc, data=legislator.data ); summary(mixed) #Using the lmer() function

#Moderated multilevel model with random intercept and a covariate
mixed <- lme( voting~house.effectc+money.groupc*acres.grandc, random=~1+money.groupc|state, data=legislator.data ); summary(mixed)  #Using the lme() function
mixed <- lmer( voting~(1+money.groupc|state)+house.effectc+money.groupc*acres.grandc, data=legislator.data ); summary(mixed)  #Using the lmer() function

#Simple effects testing in multilevel models
legislator.data <- within( legislator.data, {
  low.acres <- acres.grandc + sd(acres.grandc)
  high.acres <- acres.grandc - sd(acres.grandc)
  low.money <- money.groupc + sd(money.groupc)
  high.money <- money.groupc - sd(money.groupc)
})

#Simple effects testing
mixed.low.acres <- lme( voting~house.effectc+money.groupc*low.acres, random=~1+money.groupc|state, data=legislator.data); summary(mixed.low.acres)
mixed.high.acres <- lme( voting~house.effectc+money.groupc*high.acres, random=~1+money.groupc|state, data=legislator.data); summary(mixed.high.acres)
mixed.low.money <- lme( voting~house.effectc+low.money*acres.grandc, random=~1+money.groupc|state, data=legislator.data); summary(mixed.low.money)
mixed.high.money <- lme( voting~house.effectc+high.money*acres.grandc, random=~1+money.groupc|state, data=legislator.data); summary(mixed.high.money)

#### Baseline Model to calculate Pseudo-R^2 and ICC ####
mixed.baseline <- lme( voting~1, random=~1|state, data=legislator.data ); summary(mixed.baseline) #Using the lme() function
mixed.baseline <- lmer( voting~(1|state), data=legislator.data ); summary(mixed.baseline) #Using the lmer() function

#The following syntax will calculate the harmonic mean of the number of legislators in the state, which you can use to calculate level 2 pseudo R^2
average.legislators.state <- with( legislator.data, round(harmonic.mean(as.numeric(state)))); average.legislators.state

#### Calculate Partial R^2 ####
# First, estimate F statistics
mixed <- lme( voting~house.effectc+money.groupc*acres.grandc, random=~1+money.groupc|state, data=legislator.data ); summary(mixed)
anova.table <- anova( mixed ); anova.table
# Calculating effect size from anova table
effect.sizes <- with(anova.table, numDF/denDF*anova.table$"F-value"/(1+numDF/denDF*anova.table$"F-value"))
names(effect.sizes) <- row.names(anova.table)
effect.sizes

##### Advanced Applications #####
#3-level model with states nested within parties
mixed <- lme( voting~house.effectc+money.groupc*acres,random=~1+money.groupc|party/state, data=legislator.data); summary(mixed) #Using the lme() function
#Note: lmer requires grouping variables to be declared as factors when there are more than two levels
legislator.data <- within(legislator.data, {
  party.factor <- as.factor( party)
  state.factor <- as.factor( state)
                          })
mixed <- lmer( voting~(1+money.groupc|party.factor/state.factor)+house.effectc+acres.grandc*money.groupc, data=legislator.data); summary(mixed) #Using the lmer() function

#### Nested Growth Curve Models ####
#Note: This is HYPOTHETICAL DATA. Pretend that each legislator's vote on all pro-tobacco legislation was recorded (instead of the overall percentage) and the sequential order of each vote was stored in a variable called "time".
mixed.growth <- lme(voting~money*time,random=~1+time|state/legislator) #Using the lme() function
mixed.growth <- lmer(voting~(1|state)+(1|state:legislator)+money*time*(time|state)*(time|state:legislator)) #Using the lmer() function

#### Generalized Linear Mixed Models ####
#Generalized Multilevel Models - Poisson
poisson.mlm <- lmer(n_reps~(1|state)+gsp, family=poisson, data=legislator.data);summary(poisson.mlm)

#Generalized Linear Mixed Models - Multilevel logistic regression
# The following model only has a random intercept
logistic.mlm <- lmer( party~(1|state)+voting, family=binomial(link=logit), data=legislator.data);summary(logistic.mlm)
# The following model has a random intercept and random slope
logistic.mlm <- lmer( party~(1+voting|state)+voting, family=binomial(link=logit), data=legislator.data);summary(logistic.mlm)

#### Bootstrapping MLM ####
mixed <- lmer( voting~(1|state)+money.groupc, data=legislator.data ); summary(mixed) #Using the lmer() function
boot.mixed <- simulate( mixed, 5000 )

library(boot)
# function to obtain coefficients for each resample 
boot.lmer <- function(data, indices) {
  data <- data[indices, ]
  mlm.model <- lme(voting~acres.grandc*money.groupc,random=~1|state, data=data)
  fixef(mlm.model)
} 
# bootstrapping with 1000 replications 
bootstrapped.mlm <- boot( legislator.data, boot.lmer, 1000 )
#This will show the 95% CI for the intercept
boot.ci(bootstrapped.mlm, type="perc", index=1)
#This will show the 95% CI for the slope of acres
boot.ci(bootstrapped.mlm, type="perc", index=2)
#This will show the 95% CI for the slope of money
boot.ci(bootstrapped.mlm, type="perc", index=3)
#This will show the 95% CI for the slope of acres X money
boot.ci(bootstrapped.mlm, type="perc", index=4)



#### Cross-classification ####
cross.classified <- lmer(voting~(1+money.groupc|party)+(1+money.groupc|state)+house.effectc+money.groupc*acres.grandc, data=legislator.data); summary(cross.classified) #Cross-classification in 2 groups
